# -*- coding: utf-8 -*-
"""
Created on Tue Mar 24 10:30:16 2020

@author: Administrator
"""

print('开始采集')
import urllib
import urllib.request
from bs4 import BeautifulSoup
import re
import random
import time
import datetime

now_time = datetime.datetime.now().strftime('%Y-%m-%d')

list1 = []
for j in range(245):
    # 设置随机暂停时间
    stop = random.uniform(1, 3)
    
    random1 = random.randint(1234567891234567,9876543219876543)
    url = "https://www.yt1998.com/price/nowDayPriceQ!getPriceList.do?random=0."+ str(random1) + "&ycnam=&market=&leibie=&istoday=&spices=&paramName=&paramValue=&pageIndex="+str(j)+"&pageSize=20"
    req = urllib.request.Request(url)
    req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")
    html = urllib.request.urlopen(req).read()
    soup = BeautifulSoup(html)
    soupsplit = str(soup).split('},{')
    for i in range(len(soupsplit)):
        list2 = []
        user = re.search(r'"yueduibi":"<span.*?>(.*?)","igid":"(.*?)","yesterday":"<span.*?>(.*?)","chandi":"(.*?)","shichang":"(.*?)","pri":"(.*?)","dtm":"(.*?)","market":"(.*?)","zhouduibi":"<span.*?>(.*?)","jiduibi":"<span.*?>(.*?)","nianduibi":"<span.*?>(.*?)","zoushi":"(.*?)","ycnam":"(.*?)","guige":"(.*?)"',soupsplit[i])  
        list2.append(user.group(13))
        list2.append(user.group(14))
        list2.append(user.group(5))
        list2.append(user.group(6))
        list2.append(user.group(12))
        list2.append(user.group(9))
        list2.append(user.group(1))
        list2.append(user.group(11))
        list2.append(now_time)
        list2.append('药通网')
        list1.append(list2)
    print("已爬取药通网页数：",j+1)
    
import openpyxl
data = openpyxl.load_workbook('E:/数据/药材网/日/天药合并表.xlsx')
print(data.get_named_ranges()) # 输出工作页索引范围
print(data.get_sheet_names()) # 输出所有工作页的名称
# 取第一张表
sheetnames = data.get_sheet_names()
table = data.get_sheet_by_name(sheetnames[0])
table = data.active
print(table.title) # 输出表名
nrows = table.max_row # 获得行数
ncolumns = table.max_column # 获得行数
values = list1
for i in range(len(values)):
    for j in range(len(values[i])):
        table.cell(nrows+i+1,j+1).value = values[i][j]
data.save('E:/数据/药材网/日/天药合并表.xlsx')
print("今天完成一次数据采集!")
subject = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + " 采集完成"
print(subject)